-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[ARM] Create DeadRegisterPass for ARM #155530
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
@llvm/pr-subscribers-backend-arm Author: AZero13 (AZero13) ChangesBasically, what this does is that it turns adds, subs, ands, xors, etc, where the dest register is dead to the comparison function. The purpose of this pass is to do what we did for AArch64, but we don't have the zero-reg. So replace it with the throwaway version. Patch is 390.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155530.diff 59 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index 3847f4e966afe..afc27082fc8e0 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -41,6 +41,7 @@ FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMBranchTargetsPass();
+FunctionPass *createARMDeadRegisterDefinitions();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
@@ -66,6 +67,7 @@ void initializeARMBlockPlacementPass(PassRegistry &);
void initializeARMBranchTargetsPass(PassRegistry &);
void initializeARMConstantIslandsPass(PassRegistry &);
void initializeARMDAGToDAGISelLegacyPass(PassRegistry &);
+void initializeARMDeadRegisterDefinitionsPass(PassRegistry &);
void initializeARMExpandPseudoPass(PassRegistry &);
void initializeARMFixCortexA57AES1742098Pass(PassRegistry &);
void initializeARMLoadStoreOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/ARM/ARMDeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/ARM/ARMDeadRegisterDefinitionsPass.cpp
new file mode 100644
index 0000000000000..bb6454a62c7f9
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMDeadRegisterDefinitionsPass.cpp
@@ -0,0 +1,242 @@
+//==-- ARMDeadRegisterDefinitions.cpp - Convert dead dests to compares --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file When allowed by the instruction, replace dead definitions with compare
+/// instructions.
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-dead-defs-to-cmp"
+
+STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
+
+#define ARM_DEAD_REG_DEF_NAME "ARM Convert dead defs to compares"
+
+namespace {
+class ARMDeadRegisterDefinitions : public MachineFunctionPass {
+private:
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ bool Changed;
+ void processMachineBasicBlock(MachineBasicBlock &MBB);
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+ ARMDeadRegisterDefinitions() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ StringRef getPassName() const override { return ARM_DEAD_REG_DEF_NAME; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+char ARMDeadRegisterDefinitions::ID = 0;
+} // end anonymous namespace
+
+INITIALIZE_PASS(ARMDeadRegisterDefinitions, "arm-dead-defs-to-cmp",
+ ARM_DEAD_REG_DEF_NAME, false, false)
+
+static bool usesFrameIndex(const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.uses())
+ if (MO.isFI())
+ return true;
+ return false;
+}
+
+static std::optional<unsigned> mapToCmpCmnTstTeqOpcode(unsigned Opc) {
+ switch (Opc) {
+ // ARM encodings
+ case ARM::SUBri:
+ return ARM::CMPri;
+ case ARM::SUBrr:
+ return ARM::CMPrr;
+ case ARM::SUBrsi:
+ return ARM::CMPrsi;
+ case ARM::SUBrsr:
+ return ARM::CMPrsr;
+
+ case ARM::ADDri:
+ return ARM::CMNri;
+ case ARM::ADDrr:
+ return ARM::CMNzrr;
+ case ARM::ADDrsi:
+ return ARM::CMNzrsi;
+ case ARM::ADDrsr:
+ return ARM::CMNzrsr;
+
+ case ARM::ANDri:
+ return ARM::TSTri;
+ case ARM::ANDrr:
+ return ARM::TSTrr;
+ case ARM::ANDrsi:
+ return ARM::TSTrsi;
+ case ARM::ANDrsr:
+ return ARM::TSTrsr;
+
+ case ARM::EORri:
+ return ARM::TEQri;
+ case ARM::EORrr:
+ return ARM::TEQrr;
+ case ARM::EORrsi:
+ return ARM::TEQrsi;
+ case ARM::EORrsr:
+ return ARM::TEQrsr;
+
+ // Thumb2 encodings
+ case ARM::t2SUBri:
+ return ARM::t2CMPri;
+ case ARM::t2SUBrr:
+ return ARM::t2CMPrr;
+ case ARM::t2SUBrs:
+ return ARM::t2CMPrs;
+
+ case ARM::t2ADDri:
+ return ARM::t2CMNri;
+ case ARM::t2ADDrr:
+ return ARM::t2CMNzrr;
+ case ARM::t2ADDrs:
+ return ARM::t2CMNzrs;
+
+ case ARM::t2ANDri:
+ return ARM::t2TSTri;
+ case ARM::t2ANDrr:
+ return ARM::t2TSTrr;
+ case ARM::t2ANDrs:
+ return ARM::t2TSTrs;
+
+ case ARM::t2EORri:
+ return ARM::t2TEQri;
+ case ARM::t2EORrr:
+ return ARM::t2TEQrr;
+ case ARM::t2EORrs:
+ return ARM::t2TEQrs;
+
+ // Thumb1 limited support
+ case ARM::tSUBSrr:
+ return ARM::tCMPr;
+ case ARM::tSUBSi3:
+ return ARM::tCMPi8;
+ case ARM::tSUBSi8:
+ return ARM::tCMPi8;
+ case ARM::tAND:
+ return ARM::tTST;
+ default:
+ return std::nullopt;
+ }
+}
+
+static void copyNonDefNonPredOperands(MachineInstr &Dst,
+ const MachineInstr &Src) {
+ const MCInstrDesc &Desc = Src.getDesc();
+ int PIdx = Src.findFirstPredOperandIdx();
+ unsigned Start = Desc.getNumDefs();
+ unsigned End =
+ (PIdx == -1) ? Src.getNumOperands() : static_cast<unsigned>(PIdx);
+ for (unsigned I = Start; I < End; ++I)
+ Dst.addOperand(Src.getOperand(I));
+ if (PIdx != -1) {
+ Dst.addOperand(Src.getOperand(PIdx));
+ Dst.addOperand(Src.getOperand(PIdx + 1));
+ }
+}
+
+void ARMDeadRegisterDefinitions::processMachineBasicBlock(
+ MachineBasicBlock &MBB) {
+ // Use an explicit iterator so erasing the current instruction is safe.
+ for (auto MII = MBB.begin(), E = MBB.end(); MII != E;) {
+ MachineInstr &MI = *MII;
+
+ if (usesFrameIndex(MI)) {
+ ++MII;
+ continue;
+ }
+
+ // Only consider instructions that set CPSR (flag-setting variants).
+ if (!ARMBaseInstrInfo::isCPSRDefined(MI)) {
+ ++MII;
+ continue;
+ }
+
+ const MCInstrDesc &Desc = MI.getDesc();
+ bool Replaced = false;
+
+ for (int I = 0, EE = Desc.getNumDefs(); I != EE; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual() || (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
+ continue;
+ assert(!MO.isImplicit() && "Unexpected implicit def!");
+ if (MI.isRegTiedToUseOperand(I))
+ continue;
+
+ if (std::optional<unsigned> NewOpc =
+ mapToCmpCmnTstTeqOpcode(MI.getOpcode())) {
+ // Insert the new compare before the current instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(*NewOpc));
+ copyNonDefNonPredOperands(*MIB, MI);
+ MIB.setMIFlags(MI.getFlags());
+ for (MachineMemOperand *MMO : MI.memoperands())
+ MIB.addMemOperand(MMO);
+
+ // Erase the old instruction and update the iterator to the returned
+ // next.
+ MII = MBB.erase(MII);
+ ++NumDeadDefsReplaced;
+ Changed = true;
+ Replaced = true;
+ break;
+ }
+ }
+
+ if (!Replaced)
+ ++MII;
+ }
+}
+
+// Scan the function for instructions that have a dead definition of a
+// register. Replace that register with the zero register when possible.
+bool ARMDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+ LLVM_DEBUG(dbgs() << "***** ARMDeadRegisterDefinitions *****\n");
+ Changed = false;
+ for (auto &MBB : MF)
+ processMachineBasicBlock(MBB);
+ return Changed;
+}
+
+FunctionPass *llvm::createARMDeadRegisterDefinitions() {
+ return new ARMDeadRegisterDefinitions();
+}
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index fedf9e2cf34b1..d4da3783adf6b 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -79,6 +79,14 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("arm-global-merge", cl::Hidden,
cl::desc("Enable the global merge pass"));
+static cl::opt<bool> EnableARMDeadRegisterElimination(
+ "arm-enable-dead-defs", cl::Hidden,
+ cl::desc("Enable the pass that replaces"
+ " dead-dest flag-setting ALU"
+ " instructions with compares/tests"
+ " pre-RA"),
+ cl::init(true));
+
namespace llvm {
void initializeARMExecutionDomainFixPass(PassRegistry&);
}
@@ -510,6 +518,10 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOptLevel::None) {
+ // Replace dead-dest flag-setting ALU with compares/tests pre-RA.
+ if (EnableARMDeadRegisterElimination)
+ addPass(createARMDeadRegisterDefinitions());
+
if (getOptLevel() == CodeGenOptLevel::Aggressive)
addPass(&MachinePipelinerID);
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index a39629bd8aeb0..e770867b8ce25 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_target(ARMCodeGen
ARMCallLowering.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
+ ARMDeadRegisterDefinitionsPass.cpp
ARMExpandPseudoInsts.cpp
ARMFastISel.cpp
ARMFixCortexA57AES1742098Pass.cpp
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 960d7305e66f6..4e9b8db39f0d4 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -103,6 +103,7 @@
; CHECK-NEXT: Machine code sinking
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
+; CHECK-NEXT: ARM Convert dead defs to compares
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
diff --git a/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll b/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll
index 9d07ed655eb99..12c39cb00c5f9 100644
--- a/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll
+++ b/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll
@@ -11,7 +11,7 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
; ARM-LABEL: fn1:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: rsb r2, r2, #0
-; ARM-NEXT: adds r0, r1, r0
+; ARM-NEXT: cmn r1, r0
; ARM-NEXT: movw r1, #65535
; ARM-NEXT: sxth r2, r2
; ARM-NEXT: adc r0, r2, #1
@@ -54,7 +54,7 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
; THUMB-LABEL: fn1:
; THUMB: @ %bb.0: @ %entry
; THUMB-NEXT: rsbs r2, r2, #0
-; THUMB-NEXT: adds r0, r0, r1
+; THUMB-NEXT: cmn r1, r0
; THUMB-NEXT: sxth r2, r2
; THUMB-NEXT: adc r0, r2, #1
; THUMB-NEXT: lsls r0, r0, #16
diff --git a/llvm/test/CodeGen/ARM/addsubo-legalization.ll b/llvm/test/CodeGen/ARM/addsubo-legalization.ll
index 5ebb115791c66..5fb5629f0eee7 100644
--- a/llvm/test/CodeGen/ARM/addsubo-legalization.ll
+++ b/llvm/test/CodeGen/ARM/addsubo-legalization.ll
@@ -18,7 +18,7 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: vmov r6, r7, d19
; CHECK-NEXT: vmov lr, r12, d16
; CHECK-NEXT: vmov r4, r5, d17
-; CHECK-NEXT: subs.w r3, lr, r3
+; CHECK-NEXT: cmp lr, r3
; CHECK-NEXT: sbcs.w r2, r12, r2
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
@@ -26,7 +26,7 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r2, #-1
-; CHECK-NEXT: subs r3, r4, r6
+; CHECK-NEXT: cmp r4, r6
; CHECK-NEXT: sbcs.w r3, r5, r7
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r1, #1
@@ -57,7 +57,7 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: vmov r4, r5, d19
; CHECK-NEXT: vmov r3, r2, d16
; CHECK-NEXT: vmov r6, r7, d17
-; CHECK-NEXT: subs.w r3, lr, r3
+; CHECK-NEXT: cmp lr, r3
; CHECK-NEXT: sbcs.w r2, r12, r2
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
@@ -65,7 +65,7 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r2, #-1
-; CHECK-NEXT: subs r3, r4, r6
+; CHECK-NEXT: cmp r4, r6
; CHECK-NEXT: sbcs.w r3, r5, r7
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r1, #1
diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll
index ca9939c0f8c55..919122cc7ef31 100644
--- a/llvm/test/CodeGen/ARM/atomic-64bit.ll
+++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE
; RUN: llc < %s -mtriple=armebv7 -target-abi apcs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
@@ -6,214 +7,529 @@
; RUN: llc < %s -mtriple=armv8m--none-eabi | FileCheck %s --check-prefix=CHECK-M
define i64 @test1(ptr %ptr, i64 %val) {
-; CHECK-LABEL: test1:
-; CHECK: dmb {{ish$}}
-; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-LE: adds [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK-LE: adc [[REG4:(r[0-9]?[13579])]], [[REG2]]
-; CHECK-BE: adds [[REG4:(r[0-9]?[13579])]], [[REG2]]
-; CHECK-BE: adc [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
-; CHECK: cmp
-; CHECK: bne
-; CHECK: dmb {{ish$}}
-
-; CHECK-THUMB-LABEL: test1:
-; CHECK-THUMB: dmb {{ish$}}
-; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB-LE: adds.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB-LE: adc.w [[REG4:[a-z0-9]+]], [[REG2]]
-; CHECK-THUMB-BE: adds.w [[REG4:[a-z0-9]+]], [[REG2]]
-; CHECK-THUMB-BE: adc.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
-; CHECK-THUMB: cmp
-; CHECK-THUMB: bne
-; CHECK-THUMB: dmb {{ish$}}
-
-; CHECK-M: __atomic_fetch_add_8
+; CHECK-LE-LABEL: test1:
+; CHECK-LE: @ %bb.0:
+; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-LE-NEXT: dmb ish
+; CHECK-LE-NEXT: LBB0_1: @ %atomicrmw.start
+; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-LE-NEXT: ldrexd r4, r5, [r0]
+; CHECK-LE-NEXT: adds r6, r4, r1
+; CHECK-LE-NEXT: adc r7, r5, r2
+; CHECK-LE-NEXT: strexd r3, r6, r7, [r0]
+; CHECK-LE-NEXT: cmp r3, #0
+; CHECK-LE-NEXT: bne LBB0_1
+; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-LE-NEXT: mov r0, r4
+; CHECK-LE-NEXT: mov r1, r5
+; CHECK-LE-NEXT: dmb ish
+; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; CHECK-THUMB-LE-LABEL: test1:
+; CHECK-THUMB-LE: @ %bb.0:
+; CHECK-THUMB-LE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr}
+; CHECK-THUMB-LE-NEXT: mov r12, r0
+; CHECK-THUMB-LE-NEXT: dmb ish
+; CHECK-THUMB-LE-NEXT: .LBB0_1: @ %atomicrmw.start
+; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r12]
+; CHECK-THUMB-LE-NEXT: adds.w lr, r0, r2
+; CHECK-THUMB-LE-NEXT: adc.w r4, r1, r3
+; CHECK-THUMB-LE-NEXT: strexd r5, lr, r4, [r12]
+; CHECK-THUMB-LE-NEXT: cmp r5, #0
+; CHECK-THUMB-LE-NEXT: bne .LBB0_1
+; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-THUMB-LE-NEXT: dmb ish
+; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc}
+;
+; CHECK-BE-LABEL: test1:
+; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-BE-NEXT: dmb ish
+; CHECK-BE-NEXT: .LBB0_1: @ %atomicrmw.start
+; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT: ldrexd r4, r5, [r0]
+; CHECK-BE-NEXT: adds r7, r5, r2
+; CHECK-BE-NEXT: adc r6, r4, r1
+; CHECK-BE-NEXT: strexd r3, r6, r7, [r0]
+; CHECK-BE-NEXT: cmp r3, #0
+; CHECK-BE-NEXT: bne .LBB0_1
+; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-BE-NEXT: mov r0, r4
+; CHECK-BE-NEXT: mov r1, r5
+; CHECK-BE-NEXT: dmb ish
+; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; CHECK-THUMB-BE-LABEL: test1:
+; CHECK-THUMB-BE: @ %bb.0:
+; CHECK-THUMB-BE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr}
+; CHECK-THUMB-BE-NEXT: mov r12, r0
+; CHECK-THUMB-BE-NEXT: dmb ish
+; CHECK-THUMB-BE-NEXT: .LBB0_1: @ %atomicrmw.start
+; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-THUMB-BE-NEXT: ldrexd r0, r1, [r12]
+; CHECK-THUMB-BE-NEXT: adds.w lr, r1, r3
+; CHECK-THUMB-BE-NEXT: adc.w r4, r0, r2
+; CHECK-THUMB-BE-NEXT: strexd r5, r4, lr, [r12]
+; CHECK-THUMB-BE-NEXT: cmp r5, #0
+; CHECK-THUMB-BE-NEXT: bne .LBB0_1
+; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-THUMB-BE-NEXT: dmb ish
+; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc}
+
+
%r = atomicrmw add ptr %ptr, i64 %val seq_cst
ret i64 %r
}
define i64 @test2(ptr %ptr, i64 %val) {
-; CHECK-LABEL: test2:
-; CHECK: dmb {{ish$}}
-; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-LE: subs [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK-LE: sbc [[REG4:(r[0-9]?[13579])]], [[REG2]]
-; CHECK-BE: subs [[REG4:(r[0-9]?[13579])]], [[REG2]]
-; CHECK-BE: sbc [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
-; CHECK: cmp
-; CHECK: bne
-; CHECK: dmb {{ish$}}
-
-; CHECK-THUMB-LABEL: test2:
-; CHECK-THUMB: dmb {{ish$}}
-; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB-LE: subs.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB-LE: sbc.w [[REG4:[a-z0-9]+]], [[REG2]]
-; CHECK-THUMB-BE: subs.w [[REG4:[a-z0-9]+]], [[REG2]]
-; CHECK-THUMB-BE: sbc.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
-; CHECK-THUMB: cmp
-; CHECK-THUMB: bne
-; CHECK-THUMB: dmb {{ish$}}
-
-; CHECK-M: __atomic_fetch_sub_8
+; CHECK-LE-LABEL: test2:
+; CHECK-LE: @ %bb.0:
+; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-LE-NEXT: dmb ish
+; CHECK-LE-NEXT: LBB1_1: @ %atomicrmw.start
+; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-LE-NEXT: ldrexd r4, r5, [r0]
+; CHECK-LE-NEXT: subs r6, r4, r1
+; CHECK-LE-NEXT: sbc r7, r5, r2
+; CHECK-LE-NEXT: strexd r3, r6, r7, [r0]
+; CHECK-LE-NEXT: cmp r3, #0
+; CHECK-LE-NEXT: bne LBB1_1
+; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-LE-NEXT: mov r0, r4
+; CHECK-LE-NEXT: mov r1, r5
+; CHECK-LE-NEXT: dmb ish
+; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; CHECK-THUMB-LE-LABEL: test2:
+; CHECK-THUMB-LE: @ %bb.0:
+; CHECK-THUMB-LE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr}
+; CHECK-THUMB-LE-NEXT: mov r12, r0
+; CHECK-THUMB-LE-NEXT: dmb ish
+; CHECK-THUMB-LE-NEXT: .LBB1_1: @ %atomicrmw.start
+; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r12]
+; CHECK-THUMB-LE-NEXT: subs.w lr, r0, r2
+; CHECK-THUMB-LE-NEXT: sbc.w r4, r1, r3
+; CHECK-THUMB-LE-NEXT: strexd r5, lr, r4, [r12]
+; CHECK-THUMB-LE-NEXT: cmp r5, #0
+; CHECK-THUMB-LE-NEXT: bne .LBB1_1
+; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end
+; CHECK-THUMB-LE-NEXT: dmb ish
+; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc}
+;
+; CHECK-BE-LABEL: test2:
+; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-BE-NEXT: dmb ish
+; CHECK-BE-NEXT: .LBB1_1: @ %atomicrmw.start
+; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-BE-NEXT: ldrexd r4, r5, [r0]
+; CHECK-BE-NEXT: subs r7, r5, r2
+; CHECK-BE-NEXT: sbc r6, r4, r1
+; CHECK-BE-NEXT: strexd r3, r6, r7, [r0]
+; CHECK-BE-NEXT: cmp r3, #0
+; CHECK-BE-NEXT: bne ....
[truncated]
|
7c5a15d
to
95c240d
Compare
Basically, what this does is that it turns adds, subs, ands, xors, etc, where the dest register is dead to the comparison function. The purpose of this pass is to do what we did for AArch64, but we don't have the zero-reg. So replace it with the throwaway version.
@arsemn |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi - How many of these changes could be optimized in SDAG?
These are the byproducts of peephole elimination, so sdag already doesn't have this problem |
By the way to answer your question: I was only able to optimize one: and we don't do it in aarch64 because it does more harm than good and only fixed two tests: but even then:
But I'm not trying to do a major rewrite. I'm trying to justify each step one at a time. As of now, peephole orphans destination registers. In AArch64 we have dead register pass and the aliases of AArch64 so that this isn't a problem: but in ARM they are different opcodes. |
Basically when we remove a redundant instruction, we end up orphaning the destination sometimes and while one can argue why not have peephole do it there, well it's really because it's easier for me to modify deadregpass to work on ARM by replacing the instruction wholesale than trying to work the logic in the peephole already. |
Basically, what this does is that it turns adds, subs, ands, xors, etc, where the dest register is dead to the comparison function.
The purpose of this pass is to do what we did for AArch64, but we don't have the zero-reg. So replace it with the throwaway version.